#!usr/bin/env python
#-*- coding:utf-8 _*-
"""
@author:煜
@file: SpiderMan.py
@time: 2018/10/25
"""

from DataOutPut import DataOutPut
from HtmlParser import HtmlParser
from DownLoad import DownLoad

class Spider(object):

    def __init__(self):
        self.parser=HtmlParser()
        self.output=DataOutPut()
        self.download=DownLoad()

    def crawl(self):
        count=1
        for i in range(1,30):

            IndexHtml=self.download.download('https://www.xiashu.la/type/0_0_2_allvisit_'+str(i)+'.html')
            bookUrl=self.parser.parserIndex(IndexHtml)
            for href in bookUrl:
                if count <= 42:
                    count += 1
                    continue
                print(count)
                try:
                    bookHtml=self.download.download(href)
                    BookDetail=self.parser.parserDetailPage(bookHtml)

                    print('正在存取 %s'%BookDetail['bookName'])
                    self.output.store_data(BookDetail)
                    self.output.saveBook()
                    for index,chapter in enumerate(BookDetail['ChapterList']):
                        self.output.saveChapter(index+1,BookDetail['id'],chapter['title'],'',chapter['href'])
                    for index,remark in enumerate(BookDetail['remarkList']):
                        self.output.saveReamrk(BookDetail['id'],'default.jpg',remark['remark_ontent'],remark['user_name'])
                except:
                    continue
                
if __name__=='__main__':
    spider=Spider()
    spider.crawl()
